{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = ''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from tqdm import tqdm\n",
    "from unidecode import unidecode"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# !wget https://f000.backblazeb2.com/file/malay-dataset/parsing/indonesia-conll/Indonesian/id-common_crawl-000.conllu.jsonl\n",
    "# !wget https://f000.backblazeb2.com/file/malay-dataset/parsing/indonesia-conll/Indonesian/id-common_crawl-001.conllu.jsonl\n",
    "# !wget https://f000.backblazeb2.com/file/malay-dataset/parsing/indonesia-conll/Indonesian/id-wikipedia-000.conllu.jsonl\n",
    "# !wget https://f000.backblazeb2.com/file/malay-dataset/parsing/indonesia-conll/Indonesian/id-wikipedia-001.conllu.jsonl\n",
    "# !wget https://f000.backblazeb2.com/file/malay-dataset/parsing/indonesia-conll/Indonesian/id-wikipedia-002.conllu.jsonl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[\"# text = Hotel di dekat Cadogan Hall\\n\", \"1\\tHotel\\t_\\tNOUN\\t_\\t_\\t0\\troot\\t_\\t_\\n\", \"2\\tdi\\t_\\tADP\\t_\\t_\\t4\\tcase\\t_\\t_\\n\", \"3\\tdekat\\t_\\tADJ\\t_\\t_\\t4\\tamod\\t_\\t_\\n\", \"4\\tCadogan\\t_\\tPROPN\\t_\\t_\\t1\\tnmod\\t_\\t_\\n\", \"5\\tHall\\t_\\tPROPN\\t_\\t_\\t4\\tflat\\t_\\t_\\n\"]\r\n",
      "[\"# text = Ini bisa menjadi masalah hardware atau masalah perangkat lunak.\\n\", \"1\\tIni\\t_\\tDET\\t_\\t_\\t3\\tnsubj\\t_\\t_\\n\", \"2\\tbisa\\t_\\tADV\\t_\\t_\\t3\\tadvmod\\t_\\t_\\n\", \"3\\tmenjadi\\t_\\tVERB\\t_\\t_\\t0\\troot\\t_\\t_\\n\", \"4\\tmasalah\\t_\\tNOUN\\t_\\t_\\t3\\tobj\\t_\\t_\\n\", \"5\\thardware\\t_\\tNOUN\\t_\\t_\\t4\\tcompound\\t_\\t_\\n\", \"6\\tatau\\t_\\tCCONJ\\t_\\t_\\t7\\tcc\\t_\\t_\\n\", \"7\\tmasalah\\t_\\tNOUN\\t_\\t_\\t4\\tconj\\t_\\t_\\n\", \"8\\tperangkat\\t_\\tNOUN\\t_\\t_\\t7\\tcompound\\t_\\t_\\n\", \"9\\tlunak\\t_\\tADJ\\t_\\t_\\t8\\tamod\\t_\\tSpaceAfter=No\\n\", \"10\\t.\\t_\\tPUNCT\\t_\\t_\\t3\\tpunct\\t_\\t_\\n\"]\r\n"
     ]
    }
   ],
   "source": [
    "!head -n 2 id-common_crawl-000.conllu.jsonl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "853816 id-common_crawl-000.conllu.jsonl\n",
      "1451051 id-wikipedia-000.conllu.jsonl\n",
      "1788269 id-wikipedia-001.conllu.jsonl\n",
      "1765019 id-wikipedia-002.conllu.jsonl\n"
     ]
    }
   ],
   "source": [
    "!wc -l id-common_crawl-000.conllu.jsonl\n",
    "!wc -l id-wikipedia-000.conllu.jsonl\n",
    "!wc -l id-wikipedia-001.conllu.jsonl\n",
    "!wc -l id-wikipedia-002.conllu.jsonl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "tag2idx = {'PAD': 0,\n",
    " 'X': 1,\n",
    " 'nsubj': 2,\n",
    " 'cop': 3,\n",
    " 'det': 4,\n",
    " 'root': 5,\n",
    " 'nsubj:pass': 6,\n",
    " 'acl': 7,\n",
    " 'case': 8,\n",
    " 'obl': 9,\n",
    " 'flat': 10,\n",
    " 'punct': 11,\n",
    " 'appos': 12,\n",
    " 'amod': 13,\n",
    " 'compound': 14,\n",
    " 'advmod': 15,\n",
    " 'cc': 16,\n",
    " 'obj': 17,\n",
    " 'conj': 18,\n",
    " 'mark': 19,\n",
    " 'advcl': 20,\n",
    " 'nmod': 21,\n",
    " 'nummod': 22,\n",
    " 'dep': 23,\n",
    " 'xcomp': 24,\n",
    " 'ccomp': 25,\n",
    " 'parataxis': 26,\n",
    " 'compound:plur': 27,\n",
    " 'fixed': 28,\n",
    " 'aux': 29,\n",
    " 'csubj': 30,\n",
    " 'iobj': 31,\n",
    " 'csubj:pass': 32}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_train(group):\n",
    "    texts, arcs, tags = [], [], []\n",
    "    for g in group:\n",
    "        splitted = g.split('\\t')\n",
    "        texts.append(unidecode(splitted[1]))\n",
    "        arcs.append(int(splitted[6]))\n",
    "        tags.append(tag2idx[splitted[7]])\n",
    "        \n",
    "    return texts, arcs, tags"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['id-common_crawl-000.conllu.jsonl',\n",
       " 'id-common_crawl-001.conllu.jsonl',\n",
       " 'id-wikipedia-000.conllu.jsonl',\n",
       " 'id-wikipedia-001.conllu.jsonl',\n",
       " 'id-wikipedia-002.conllu.jsonl']"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from glob import glob\n",
    "\n",
    "files = sorted(glob('*.conllu.jsonl'))\n",
    "files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "id-common_crawl-000.conllu.jsonl\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "853816it [00:18, 46091.10it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "id-common_crawl-001.conllu.jsonl\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "828815it [00:18, 45351.89it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "id-wikipedia-000.conllu.jsonl\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1451051it [00:23, 61154.67it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "id-wikipedia-001.conllu.jsonl\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1788269it [00:25, 70645.21it/s] \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "id-wikipedia-002.conllu.jsonl\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1765019it [00:24, 71442.17it/s]\n"
     ]
    }
   ],
   "source": [
    "from tqdm import tqdm\n",
    "\n",
    "with open('train.json', 'w') as train:\n",
    "    for f in files:\n",
    "        print(f)\n",
    "        count = 0\n",
    "        with open(f) as fopen:\n",
    "            for l in tqdm(fopen):\n",
    "                data = json.loads(l)\n",
    "                data = [d for d in data if d[0] != '#']\n",
    "                if len(data) > 200:\n",
    "                    # print(data)\n",
    "                    continue\n",
    "                \n",
    "                try:\n",
    "                    texts, arcs, tags = get_train(data)\n",
    "                    if any([len(t) > 50 for t in texts]) and len(data) > 120:\n",
    "                        # print(data)\n",
    "                        continue\n",
    "                        \n",
    "                    d = {'translation': {\n",
    "                        'texts': texts,\n",
    "                        'arcs': arcs,\n",
    "                        'tags': tags,\n",
    "                    }}\n",
    "                    train.write(f'{json.dumps(d)}\\n')\n",
    "                    count += 1\n",
    "                    \n",
    "#                     if count == 5000:\n",
    "#                         break\n",
    "                        \n",
    "                except Exception as e:\n",
    "                    print(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['1\\t31\\t_\\tNUM\\t_\\tNumType=Card\\t2\\tnummod\\t_\\t_\\n',\n",
       " '2\\tJeff\\t_\\tPROPN\\t_\\t_\\t0\\troot\\t_\\t_\\n',\n",
       " '3\\tBurton\\t_\\tPROPN\\t_\\t_\\t2\\tflat\\t_\\t_\\n',\n",
       " '4\\tRichard\\t_\\tPROPN\\t_\\t_\\t3\\tflat\\t_\\t_\\n',\n",
       " '5\\tChildress\\t_\\tPROPN\\t_\\t_\\t4\\tflat\\t_\\t_\\n',\n",
       " '6\\tRacing\\t_\\tPROPN\\t_\\t_\\t5\\tflat\\t_\\t_\\n',\n",
       " '7\\tChevrolet\\t_\\tPROPN\\t_\\t_\\t6\\tflat\\t_\\t_\\n',\n",
       " '8\\t46.686\\t_\\tNUM\\t_\\tNumType=Card\\t7\\tnummod\\t_\\t_\\n',\n",
       " '9\\t192.777\\t_\\tNUM\\t_\\tNumType=Card\\t7\\tnummod\\t_\\t_\\n']"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "!shuf train.json > shuffled-train.json\n",
    "!head -n 1000 shuffled-train.json > test.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6686644 shuffled-train.json\r\n"
     ]
    }
   ],
   "source": [
    "!wc -l shuffled-train.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\"translation\": {\"texts\": [\"Penyakit\", \"ini\", \"dinamai\", \"dari\", \"Moritz\", \"Kaposi\", \"(\", \"1837-1902\", \")\", \",\", \"seorang\", \"ahli\", \"ilmu\", \"penyakit\", \"kulit\", \"Hongaria\", \"yang\", \"pertama\", \"kali\", \"menjelaskan\", \"gejala\", \"penyakit\", \"ini\", \"pada\", \"tahun\", \"1872\", \".\"], \"arcs\": [3, 1, 0, 5, 3, 5, 8, 5, 8, 12, 12, 5, 12, 13, 14, 15, 18, 12, 18, 3, 20, 21, 21, 25, 20, 25, 3], \"tags\": [6, 4, 5, 8, 9, 10, 11, 12, 11, 11, 4, 18, 14, 14, 14, 10, 2, 13, 15, 20, 17, 14, 4, 8, 9, 22, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"\\\"\", \"Secret\", \"of\", \"Success\", \"\\\"\"], \"arcs\": [2, 0, 2, 3, 2], \"tags\": [11, 5, 21, 21, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Pada\", \"tanggal\", \"27\", \"Oktober\", \",\", \"Laksamana\", \"Inggris-Seymour\", \"(\", \"Michael\", \"Seymour\", \")\", \"mengirim\", \"surat\", \"kepada\", \"Ye\", \"Ming\", \"Chen\", \"dengan\", \"mengulangi\", \"permintaan\", \"untuk\", \"memasuki\", \"kota\", \";\", \"karena\", \"tidak\", \"mendapat\", \"jawaban\", \"dari\", \"Ye\", \"Ming\", \"Chen\", \",\", \"Kantor\", \"Gubernur\", \"dibom\", \"sekali\", \"dalam\", \"5~7\", \"menit\", \".\"], \"arcs\": [2, 12, 4, 2, 2, 12, 6, 9, 6, 9, 9, 0, 12, 15, 12, 15, 16, 19, 12, 19, 22, 19, 22, 12, 27, 27, 12, 27, 30, 27, 30, 31, 34, 27, 34, 35, 36, 40, 40, 36, 12], \"tags\": [8, 9, 22, 21, 11, 2, 10, 11, 12, 10, 11, 5, 17, 8, 9, 10, 10, 8, 24, 17, 8, 24, 17, 11, 19, 15, 20, 17, 8, 9, 10, 10, 11, 17, 10, 14, 15, 8, 22, 21, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Copernicus\", \"Books\", \"(\", \"Springer\", \"Verlag\", \")\"], \"arcs\": [0, 1, 4, 1, 4, 4], \"tags\": [5, 10, 11, 12, 10, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Carlsson\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"SkyscraperPage\", \"Diagram\", \"of\", \"Dubai\", \"in\", \"2003\"], \"arcs\": [0, 1, 2, 3, 4, 5], \"tags\": [5, 21, 21, 10, 10, 22]}}\r\n",
      "{\"translation\": {\"texts\": [\"Jumlah\", \"penduduknya\", \"berlipat\", \"dalam\", \"30\", \"tahun\", \",\", \"juga\", \"di\", \"wilayah\", \"besar\", \"di\", \"timur\", \"daerah\", \"Paris\", \".\"], \"arcs\": [3, 1, 0, 6, 6, 3, 3, 3, 10, 3, 10, 13, 10, 13, 14, 3], \"tags\": [2, 14, 5, 8, 22, 9, 11, 15, 8, 9, 13, 8, 21, 14, 10, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Lalu\", \"bagaimana\", \"jika\", \"si\", \"murid\", \"hanya\", \"punya\", \"satu\", \"sepatu\", \"berwarna\", \"hitam\", \"dan\", \"karena\", \"kemarin\", \"hujan\", \",\", \"sepatunya\", \"belum\", \"kering\", \",\", \"bolehkah\", \"ia\", \"upacara\", \"dengan\", \"bertelanjang\", \"kaki\", \"?\"], \"arcs\": [7, 7, 7, 5, 7, 7, 0, 9, 7, 9, 10, 19, 19, 19, 14, 19, 19, 19, 7, 7, 22, 7, 22, 25, 23, 25, 7], \"tags\": [19, 15, 19, 4, 2, 15, 5, 4, 17, 7, 13, 16, 19, 21, 14, 11, 2, 15, 18, 11, 19, 20, 13, 8, 7, 17, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Fase\", \"kemunduran\", \"(\", \"decline\", \")\", \",\", \"masa\", \"sesudah\", \"pension\", \"atau\", \"melepaskan\", \"jabatan\", \"tertentu\", \".\"], \"arcs\": [0, 1, 4, 1, 4, 7, 1, 9, 7, 11, 1, 11, 12, 1], \"tags\": [5, 14, 11, 12, 11, 11, 18, 8, 21, 16, 18, 17, 4, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"HDPE\", \"bisa\", \"diproduksi\", \"dengan\", \"katalis\", \"kromium/silika\", \",\", \"katalis\", \"Ziegler-Natta\", \",\", \"atau\", \"katalis\", \"metallocene\", \".\"], \"arcs\": [3, 3, 0, 5, 3, 5, 8, 5, 8, 12, 12, 5, 12, 3], \"tags\": [6, 15, 5, 8, 9, 14, 11, 18, 10, 11, 16, 18, 14, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Pendar\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"Hlm\", \".\", \"75\", \".\"], \"arcs\": [0, 1, 1, 1], \"tags\": [5, 11, 22, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"menyimpang\", \"dari\", \"arahan\", \"kebijakan\", \"konstitusi\", \"HIMAPENA\", \",\", \"maka\", \"dewan\", \"komisaris\", \"berkewajiban\", \"mengelarukan\", \"memorandum\", \"pertama\"], \"arcs\": [0, 3, 1, 3, 4, 5, 1, 11, 11, 9, 1, 11, 12, 13], \"tags\": [5, 8, 9, 14, 14, 10, 11, 19, 6, 13, 20, 24, 17, 22]}}\r\n",
      "{\"translation\": {\"texts\": [\"6\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"Hampir\", \"seluruh\", \"sekolah\", \"dan\", \"universitas\", \"di\", \"Jakarta\", \",\", \"tempat\", \"diadakannya\", \"Sidang\", \"Istimewa\", \"tersebut\", \",\", \"diliburkan\", \"untuk\", \"mencegah\", \"mahasiswa\", \"berkumpul\", \".\"], \"arcs\": [3, 3, 10, 5, 3, 7, 5, 10, 10, 0, 10, 11, 11, 10, 10, 17, 15, 17, 18, 10], \"tags\": [15, 4, 9, 16, 18, 8, 21, 11, 2, 5, 17, 10, 4, 11, 26, 8, 24, 17, 7, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"*\", \"*\", \"garis\", \"Biru\", \"tersedia\", \"kelas\", \"EKSEKUTIF\", \"dan\", \"BISNIS\"], \"arcs\": [3, 3, 0, 5, 3, 5, 6, 9, 6], \"tags\": [11, 11, 5, 2, 26, 17, 10, 16, 18]}}\r\n",
      "{\"translation\": {\"texts\": [\"Artinya\", \":\", \"Lalu\", \"dia\", \"dihadapkan\", \"ke\", \"arah\", \"timur\", \"laut\"], \"arcs\": [0, 1, 5, 5, 1, 7, 5, 7, 8], \"tags\": [5, 11, 19, 6, 7, 8, 9, 14, 14]}}\r\n",
      "{\"translation\": {\"texts\": [\"Alberto\", \"Pomini\"], \"arcs\": [0, 1], \"tags\": [5, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"Pesannya\", \"apa\", \"(\", \"says\", \"what\", \")\"], \"arcs\": [0, 1, 4, 2, 4, 4], \"tags\": [5, 14, 11, 12, 14, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"6\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"yaitu\", \"dengan\", \"cara\", \"warga\", \"desa\", \"Kriyan\", \"diharapkan\", \"memakai\", \"pakaian\", \"adat\", \"Jawa\", \",\", \"meskipun\", \"minimal\", \"yang\", \"pria\", \"menggunakan\", \"blangkon\", \"dikombinasikan\", \"dengan\", \"kemeja/kaos/baju\", \"koko\", \"dan\", \"bagi\", \"yang\", \"wanita\", \"menggunakan\", \"baju\", \"kebaya\", \"yang\", \"dikombinasikan\", \"dengan\", \"celana\", \"panjang\", \".\", \"kecuali\", \"pada\", \"event-event\", \"tertentu\", \"diusahakan\", \"memakai\", \"pakaian\", \"adat\", \"jawa\", \"secara\", \"lengkap\", \"seperti\", \"menghadiri\", \"sedekah\", \"Bumi\", \",\", \"Pesta\", \"Baratan\", \",\", \"dll\", \".\"], \"arcs\": [3, 3, 7, 3, 3, 5, 0, 7, 8, 9, 10, 7, 14, 7, 17, 17, 14, 17, 14, 21, 19, 21, 27, 27, 27, 27, 19, 27, 28, 31, 28, 33, 31, 33, 7, 40, 38, 40, 38, 7, 40, 41, 42, 43, 41, 41, 48, 41, 50, 48, 50, 50, 52, 52, 52, 40], \"tags\": [15, 8, 6, 14, 14, 10, 5, 24, 17, 14, 10, 11, 19, 20, 17, 2, 7, 17, 20, 8, 9, 14, 16, 8, 9, 2, 18, 17, 14, 2, 7, 8, 9, 13, 11, 16, 8, 9, 4, 23, 24, 17, 14, 14, 8, 13, 8, 24, 4, 17, 11, 12, 10, 11, 12, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Kategori\", \":\", \"Tempat\"], \"arcs\": [0, 1, 1], \"tags\": [5, 11, 14]}}\r\n",
      "{\"translation\": {\"texts\": [\"K\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"Promotor\", \"Chris\", \"Brown\", \":\", \"Kami\", \"Sudah\", \"Sesuai\", \"Prosedur\", \"!\"], \"arcs\": [0, 1, 2, 1, 1, 5, 6, 7, 5], \"tags\": [5, 10, 10, 11, 12, 10, 10, 10, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Kategori\", \":\", \"Permukiman\", \"di\", \"Dubai\"], \"arcs\": [0, 1, 1, 5, 3], \"tags\": [5, 11, 12, 8, 21]}}\r\n",
      "{\"translation\": {\"texts\": [\"studio\", \"pribadi\", \"ditugaskan\", \"Peter\", \"Reichelt\", \",\", \"seorang\", \"produser\", \"pameran\", \"museum\", \"dari\", \"Mannheim\", \",\", \"Jerman\", \",\", \"sebagai\", \"agennya\", \"untuk\", \"Eropa\", \".\"], \"arcs\": [3, 1, 0, 3, 4, 3, 8, 3, 8, 9, 12, 8, 12, 12, 8, 17, 8, 19, 17, 3], \"tags\": [2, 13, 5, 17, 10, 11, 4, 12, 14, 10, 8, 21, 11, 12, 11, 8, 21, 8, 21, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Identifikasi\", \"Ciri-ciri\", \"Tubuh\", \"dalam\", \"Skandal\", \"Sex\", \"Anggota\", \"Komisi\", \"IX\", \"DPR\", \"RI\"], \"arcs\": [0, 1, 2, 5, 1, 5, 6, 7, 8, 1, 10], \"tags\": [5, 10, 10, 8, 21, 10, 10, 10, 10, 10, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"Candi\", \"Borobudur\"], \"arcs\": [0, 1], \"tags\": [5, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"Hukum\", \"Mengakhirkan\", \"Sholat\", \"Isya\", \"'\"], \"arcs\": [0, 1, 2, 3, 1], \"tags\": [5, 10, 10, 10, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Kematiannya\", \"konon\", \"terjadi\", \"pada\", \"saat\", \"ia\", \"melahirkan\", \",\", \"namun\", \"tanggal\", \"yang\", \"ada\", \"tidak\", \"menyebutkan\", \"teori\", \"tersebut\", \",\", \"yang\", \"tidak\", \"dibenarkan\", \"di\", \"dalam\", \"beberapa\", \"artikel\", \".\"], \"arcs\": [3, 1, 0, 5, 7, 7, 3, 3, 14, 14, 12, 10, 14, 3, 14, 15, 3, 20, 20, 3, 24, 24, 24, 20, 3], \"tags\": [2, 14, 5, 8, 9, 2, 24, 11, 19, 2, 2, 7, 15, 20, 17, 4, 11, 6, 15, 7, 8, 8, 4, 9, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"GSP\", \"FM\", \"95.2\", \"FM\", \"-\", \"Kutoarjo\", \"Purworejo\"], \"arcs\": [0, 1, 2, 2, 4, 4, 6], \"tags\": [5, 10, 22, 10, 11, 10, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"Perekonomian\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"Rangkuman\", \"Materi\", \"UN\", \"Bahasa\", \"Indonesia\", \"SMP\"], \"arcs\": [6, 1, 2, 3, 4, 0], \"tags\": [6, 10, 10, 10, 10, 5]}}\r\n",
      "{\"translation\": {\"texts\": [\"Muawiyah\", \"menjadi\", \"Khalifah\", \"dan\", \"mendirikan\", \"Bani\", \"Umayyah\"], \"arcs\": [2, 0, 2, 5, 2, 5, 6], \"tags\": [2, 5, 17, 16, 18, 17, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"Pranala\", \"luar\"], \"arcs\": [0, 1], \"tags\": [5, 13]}}\r\n",
      "{\"translation\": {\"texts\": [\"bawaanmu\", \"!\"], \"arcs\": [0, 1], \"tags\": [5, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Hasil\", \"Dinas\", \"Kependudukan\", \"dan\", \"Pencatatan\", \"Sipil\", \"2013\", \",\", \"jumlah\", \"penduduk\", \"Kota\", \"Pasuruan\", \"bejumlah\", \"208.079\", \"jiwa\", \"dengan\", \"kepadatan\", \"penduduk\", \"sekitar\", \"+-5.091\", \"jiwa/km2\", \".\"], \"arcs\": [0, 1, 2, 5, 2, 5, 6, 9, 2, 9, 10, 11, 2, 15, 13, 17, 13, 17, 21, 21, 17, 1], \"tags\": [5, 10, 10, 16, 18, 10, 22, 11, 18, 14, 10, 10, 18, 22, 17, 8, 9, 14, 8, 22, 21, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Pada\", \"wawancara\", \"kemudian\", \"pada\", \"tahun\", \"itu\", \"juga\", \",\", \"Borland\", \",\", \"yang\", \"juga\", \"punya\", \"proyek\", \"sampingan\", \"bernama\", \"Big\", \"Dumb\", \"Face\", \",\", \"memperlihatkan\", \"kekesalannya\", \"dengan\", \"Limp\", \"Bizkit\", \".\"], \"arcs\": [2, 13, 5, 5, 2, 5, 9, 9, 2, 13, 13, 13, 0, 13, 14, 14, 16, 17, 18, 13, 13, 21, 24, 21, 24, 13], \"tags\": [8, 9, 15, 8, 21, 4, 15, 11, 12, 11, 2, 15, 5, 17, 14, 7, 17, 10, 10, 11, 20, 17, 8, 9, 10, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Alexander\", \"(\", \"wafat\", \"1420\", \")\"], \"arcs\": [0, 3, 1, 3, 3], \"tags\": [5, 11, 12, 22, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Belgia\", \"di\", \"Kontes\", \"Lagu\", \"Eurovision\"], \"arcs\": [0, 3, 1, 3, 4], \"tags\": [5, 8, 21, 10, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"4\", \"133\", \"?\"], \"arcs\": [2, 0, 2], \"tags\": [22, 5, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Pimprez\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"ALIH\", \"SMA\", \"Negeri\", \"1\", \"Purworejo\"], \"arcs\": [0, 1, 2, 3, 1], \"tags\": [5, 10, 10, 22, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"Bambang\", \"Noorsena\", \"menulis\", \"sejak\", \"mahasiswa\", \",\", \"bukunya\", \"yang\", \"terbit\", \"pertama\", \"Telaah\", \"Kristis\", \"Injil\", \"Barnabas\", \".\"], \"arcs\": [3, 1, 0, 5, 3, 3, 3, 9, 7, 9, 9, 11, 12, 13, 3], \"tags\": [2, 10, 5, 8, 9, 11, 26, 2, 7, 13, 17, 10, 10, 10, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"JAM\", \"TANGAN\", \"CASIO\", \"TERBARU\", \"DENGAN\", \"HARGA\", \"MURAH\"], \"arcs\": [0, 1, 2, 1, 4, 5, 6], \"tags\": [5, 10, 10, 10, 10, 10, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"84\", \"MNC\", \"News\"], \"arcs\": [2, 0, 2], \"tags\": [22, 5, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"Selain\", \"itu\", \"dapat\", \"digunakan\", \"untuk\", \"melembabkan\", \"bibir\", \"kering\", \",\", \"cukup\", \"dioleskan\", \"dibagian\", \"bibir\", \"yang\", \"kering.2.Pembersih\", \"make-up\", \"-\", \"minyak\", \"jojoba\", \"tidak\", \"menyebabkan\", \"iritasi\", \"mata\", \"atau\", \"alergi\", \"sehingga\", \"Anda\", \"dapat\", \"menggunakannya\", \"untuk\", \"menghilangkan\", \"riasan\", \"mata\", \".\"], \"arcs\": [4, 4, 4, 0, 6, 4, 6, 7, 11, 11, 4, 11, 12, 15, 13, 15, 16, 16, 16, 21, 13, 21, 22, 25, 23, 29, 29, 29, 21, 31, 29, 31, 32, 4], \"tags\": [8, 6, 15, 5, 8, 24, 17, 13, 11, 15, 18, 24, 17, 2, 13, 14, 11, 27, 14, 15, 7, 17, 14, 16, 18, 19, 2, 15, 20, 8, 24, 17, 14, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Remaja\", \"76\", \"(\", \"1976\", \")\", \"disutradarai\", \"oleh\", \"Ismail\", \"Soebardjo\"], \"arcs\": [6, 1, 4, 2, 4, 0, 8, 6, 8], \"tags\": [6, 22, 11, 12, 11, 5, 8, 9, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"16\", \"Januari\", \"2014\", \".\"], \"arcs\": [2, 0, 2, 2], \"tags\": [22, 5, 22, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Seng\", \"Nong\", \"menyatakan\", \",\", \"kriteria\", \"unggul\", \"nilai\", \"atau\", \"manfaat\", \"dari\", \"sebuah\", \"tanaman\", \"obat\", \"adalah\", \"bila\", \"dikonsumsi\", \"dalam\", \"jangka\", \"waktu\", \"lama\", \"tidak\", \"menimbulkan\", \"efek\", \"samping\", \".\"], \"arcs\": [3, 1, 0, 3, 16, 5, 6, 9, 5, 12, 12, 9, 12, 16, 16, 3, 18, 16, 18, 19, 22, 16, 22, 23, 3], \"tags\": [2, 10, 5, 11, 2, 14, 14, 16, 18, 8, 4, 21, 14, 3, 19, 26, 8, 9, 14, 13, 15, 24, 17, 14, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Doresia\", \"Krings\"], \"arcs\": [0, 1], \"tags\": [5, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"Gunung\", \"Paseur\", \"425\", \"Pasaman\", \"Barat\"], \"arcs\": [0, 1, 2, 1, 4], \"tags\": [5, 10, 22, 12, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"Klinsmann\", \"kecil\", \"juga\", \"turut\", \"membantu\", \"pekerjaan\", \"keluarganya\", \"untuk\", \"menjadi\", \"penjaja\", \"roti\", \"keliling\", \"dan\", \"magang\", \"sebagai\", \"pelayan\", \"yang\", \"dijalani\", \"selain\", \"berlatih\", \"sepak\", \"bola\", \".\"], \"arcs\": [5, 1, 5, 5, 0, 5, 6, 9, 5, 9, 10, 11, 14, 12, 16, 10, 18, 16, 20, 18, 20, 21, 5], \"tags\": [2, 13, 15, 15, 5, 17, 14, 8, 24, 17, 14, 14, 16, 18, 8, 21, 2, 7, 8, 24, 17, 14, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Misalnya\", \",\", \"Sekolah\", \"Teknik\", \"Elektro\", \"dan\", \"Informatika\", \"(\", \"STEI\", \")\", \"ITB\", \"memiliki\", \"5\", \"program\", \"studi\", \",\", \"yaitu\", \"di\", \"lingkup\", \"keelektroteknikan\", \"(\", \"Teknik\", \"Elektro\", \",\", \"Telekomunikasi\", \",\", \"dan\", \"Tenaga\", \"Listrik\", \")\", \",\", \"serta\", \"di\", \"lingkup\", \"ilmu\", \"komputer\", \"(\", \"Teknik\", \"Informatika\", \"dan\", \"Sistem\", \"Teknologi\", \"Informasi\", \")\", \".\"], \"arcs\": [12, 12, 12, 3, 4, 7, 3, 9, 7, 9, 12, 0, 14, 12, 14, 14, 19, 19, 12, 19, 22, 19, 22, 25, 22, 28, 28, 22, 28, 22, 34, 34, 34, 19, 34, 35, 38, 34, 38, 41, 38, 41, 42, 38, 12], \"tags\": [15, 11, 2, 10, 10, 16, 18, 11, 12, 11, 2, 5, 22, 17, 14, 11, 15, 8, 9, 14, 11, 12, 10, 11, 18, 11, 16, 18, 10, 11, 11, 16, 8, 18, 14, 14, 11, 12, 10, 16, 18, 10, 10, 11, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"121\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"Sayangnya\", \",\", \"hampir\", \"tak\", \"ada\", \"dokumen\", \"Mesir\", \"asli\", \"tentang\", \"alkimia\", \"yang\", \"masih\", \"tersisa\", \"sekarang\", \".\"], \"arcs\": [5, 5, 5, 5, 0, 5, 6, 7, 10, 6, 13, 13, 10, 13, 5], \"tags\": [15, 11, 15, 15, 5, 17, 10, 13, 8, 21, 2, 15, 7, 17, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Trimakasi\", \"untuk\", \"para\", \"penyusun\", \"KDB\", \",\", \"kalender\", \"ini\", \"sangat\", \"membantu\", \"saya\", \"unt\", \"mengetahui\", \"rerahinan\", \"selagi\", \"berada\", \"di\", \"luar\", \"Bali\", \",\", \"Suksma\", \"16-11-2011\", \".\"], \"arcs\": [10, 4, 4, 1, 4, 10, 10, 7, 10, 0, 10, 13, 10, 16, 16, 13, 18, 16, 18, 16, 16, 21, 10], \"tags\": [26, 8, 4, 9, 10, 11, 2, 4, 15, 5, 17, 8, 24, 2, 15, 25, 8, 9, 10, 11, 22, 22, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Setelah\", \"The\", \"Final\", \"Cut\", \"(\", \"yang\", \"merupakan\", \"final\", \"saga\", \"dari\", \"The\", \"Wall\", \")\", \",\", \"para\", \"anggota\", \"Pink\", \"Floyd\", \"mengeluarkan\", \"album\", \"solo\", \"sendiri-sendiri\", \"sampai\", \"1987\", \",\", \"ketika\", \"Gilmour\", \"dan\", \"Mason\", \"mulai\", \"membangkitkan\", \"kembali\", \"band\", \"ini\", \".\"], \"arcs\": [3, 3, 19, 3, 7, 7, 3, 7, 8, 12, 12, 8, 7, 3, 16, 19, 16, 17, 0, 19, 20, 21, 19, 31, 31, 31, 31, 30, 30, 27, 19, 31, 31, 33, 19], \"tags\": [8, 4, 9, 10, 11, 2, 12, 17, 14, 8, 4, 21, 11, 11, 4, 2, 10, 10, 5, 17, 14, 13, 8, 22, 11, 19, 2, 16, 2, 18, 20, 15, 17, 4, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Kombinasi\", \"dari\", \"nama\", \"jalan\", \"dan\", \"kecepatan\", \"mobil\", \"yang\", \"berhasil\", \"dicapainya\", \"membuat\", \"mereka\", \"menamai\", \"jenis\", \"bahan\", \"bakar\", \"baru\", \"tersebut\", \"sebagai\", \"\\\"\", \"Phillips\", \"66\", \"\\\"\", \",\", \"yang\", \"masih\", \"dipasarkan\", \"sampai\", \"saat\", \"ini\", \".\"], \"arcs\": [11, 3, 1, 3, 6, 1, 6, 9, 1, 9, 0, 13, 11, 13, 14, 15, 16, 14, 21, 21, 13, 21, 21, 13, 27, 27, 24, 29, 27, 29, 11], \"tags\": [2, 8, 21, 14, 16, 18, 14, 2, 7, 17, 5, 2, 24, 17, 14, 14, 13, 4, 8, 11, 9, 22, 11, 11, 6, 15, 7, 8, 9, 4, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Dari\", \"17\", \"Oktober\", \"sampai\", \"20\", \"November\", \",\", \"dia\", \"memainkan\", \"peran\", \"kunci\", \"dalam\", \"Operasi\", \"Leyte\", \",\", \"mencangkup\", \"penghancuran\", \"Armada\", \"Jepang\", \"dalam\", \"Pertempuran\", \"di\", \"Selat\", \"Surigao\", \"(\", \"25\", \"Oktober\", \")\", \".Pada\", \"1\", \"Januari\", \"1945\", \",\", \"dia\", \"berangkat\", \"menuju\", \"Palaus\", \"untuk\", \"pendaratan\", \"di\", \"Luzon\", \".\"], \"arcs\": [3, 3, 9, 6, 6, 3, 9, 9, 0, 9, 10, 13, 10, 13, 9, 9, 16, 17, 18, 21, 17, 23, 16, 23, 27, 27, 23, 27, 16, 31, 29, 31, 35, 35, 16, 37, 35, 39, 37, 41, 37, 9], \"tags\": [8, 22, 9, 8, 22, 21, 11, 2, 5, 17, 14, 8, 21, 10, 11, 24, 17, 14, 10, 8, 21, 8, 9, 10, 11, 22, 12, 11, 18, 22, 10, 22, 11, 2, 18, 8, 9, 8, 21, 8, 21, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Birmensdorf\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"MISI\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"15065\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"R&B/Hip-Hop\", \"Song\", \"(\", \"\\\"\", \"Wild\", \"Ones\", \"\\\"\", \"with\", \"Sia\", \")\"], \"arcs\": [0, 1, 5, 5, 1, 5, 5, 5, 8, 5], \"tags\": [5, 10, 11, 11, 12, 10, 11, 10, 10, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Kategori\", \":\", \"Komune\", \"di\", \"Perancis\"], \"arcs\": [0, 1, 1, 5, 3], \"tags\": [5, 11, 12, 8, 21]}}\r\n",
      "{\"translation\": {\"texts\": [\"Menurut\", \"pendapat\", \"yang\", \"kuat\", \"dari\", \"Jumhur\", \"Ulama\", \"takbiran\", \"'\", \"Idul\", \"Fitri\", \"dapat\", \"dimulai\", \"ketika\", \"hendak\", \"pergi\", \"menuju\", \"shalat\", \"'\", \"Idul\", \"Fitri\", \"sampai\", \"imam\", \"mulai\", \"khutbah\", \".\"], \"arcs\": [2, 13, 4, 2, 6, 2, 6, 6, 8, 8, 10, 13, 0, 16, 16, 13, 18, 16, 18, 18, 20, 23, 16, 23, 24, 13], \"tags\": [8, 9, 2, 13, 8, 21, 10, 18, 11, 10, 10, 15, 5, 19, 15, 20, 8, 9, 11, 10, 10, 8, 9, 7, 13, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Brie\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"Giant\", \"Hypermarket\", \"MetLand\", \"Jonggol\"], \"arcs\": [0, 1, 2, 3], \"tags\": [5, 10, 10, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"Teori\", \"yang\", \"digunakan\", \"dalam\", \"pengobatan\", \"didasarkan\", \"pada\", \"beberapa\", \"acuan\", \"filsafat\", \"termasuk\", \"teori\", \"Yin-yang\", \",\", \"lima\", \"unsur\", \"(\", \"Wu-xing\", \")\", \",\", \"sistem\", \"meridian\", \"tubuh\", \"manusia\", \"(\", \"Jing-luo\", \")\", \",\", \"teori\", \"organ\", \"Zang\", \"Fu\", \",\", \"dan\", \"lainnya\", \".\"], \"arcs\": [6, 3, 1, 5, 3, 0, 9, 9, 6, 9, 6, 11, 12, 16, 16, 12, 18, 16, 18, 21, 12, 21, 21, 23, 26, 23, 26, 29, 12, 29, 29, 31, 35, 35, 12, 6], \"tags\": [6, 6, 7, 8, 9, 5, 8, 4, 9, 14, 20, 17, 10, 11, 22, 18, 11, 12, 11, 11, 18, 14, 14, 14, 11, 12, 11, 11, 18, 14, 10, 10, 11, 16, 18, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"25\", \"Oktober\", \"1985\"], \"arcs\": [2, 0, 2], \"tags\": [22, 5, 22]}}\r\n",
      "{\"translation\": {\"texts\": [\"Ia\", \"adalah\", \"penggagas\", \"dan\", \"anggota\", \"komite\", \"persiapan\", \"dan\", \"memerintah\", \"dewan\", \"Lembaga\", \"Penelitian\", \"Perdamaian\", \"Internasional\", \"Stockholm\", \".\"], \"arcs\": [3, 3, 0, 5, 3, 5, 6, 9, 3, 9, 10, 11, 12, 13, 14, 3], \"tags\": [2, 3, 5, 16, 18, 14, 14, 16, 18, 17, 14, 14, 10, 10, 10, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Singosaren\", \"Plaza\"], \"arcs\": [0, 1], \"tags\": [5, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"International\", \"Organization\", \"for\", \"Standardization\", \",\", \"1992\", \".\"], \"arcs\": [0, 1, 2, 3, 1, 1, 1], \"tags\": [5, 10, 10, 10, 11, 22, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Gears\", \"dapat\", \"diunduh\", \"di\", \"alamat\", \".\"], \"arcs\": [3, 3, 0, 5, 3, 3], \"tags\": [6, 15, 5, 8, 9, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Salignac\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"1979\", \".\", \"Nagarakretagama\", \"dan\", \"Tafsir\", \"Sejarahnya\", \".\"], \"arcs\": [3, 1, 0, 5, 3, 5, 3], \"tags\": [22, 11, 5, 16, 18, 14, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Surawisesa\", \"adalah\", \"Pengganti\", \"Sri\", \"Baduga\", \"Maharaja\", \",\", \"Surawisesa\", \"(\", \"puteranya\", \"dari\", \"Mayang\", \"Sunda\", \"dan\", \"juga\", \"cucu\", \"Prabu\", \"Susuktunggal\", \")\", \".\"], \"arcs\": [3, 3, 0, 3, 4, 5, 8, 3, 10, 8, 12, 10, 12, 16, 16, 10, 16, 17, 10, 3], \"tags\": [2, 3, 5, 10, 10, 10, 11, 18, 11, 12, 8, 21, 10, 16, 15, 18, 10, 10, 11, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Potensi\", \"desa\"], \"arcs\": [0, 1], \"tags\": [5, 14]}}\r\n",
      "{\"translation\": {\"texts\": [\"6\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"Perdana\", \"Menteri\", \"Interim\"], \"arcs\": [0, 1, 2], \"tags\": [5, 10, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"Pada\", \"puncaknya\", \",\", \"antara\", \"1919\", \"dan\", \"1939\", \",\", \"imperium\", \"kolonial\", \"Perancis\", \"kedua\", \"membentang\", \"hingga\", \"12.347.000\", \"kilometer\", \"persegi\", \"(\", \"4.767.000\", \"sq\", \"mi\", \")\", \".\"], \"arcs\": [2, 13, 13, 13, 13, 7, 5, 5, 13, 9, 10, 10, 0, 16, 16, 13, 16, 20, 20, 17, 20, 20, 13], \"tags\": [8, 9, 11, 8, 22, 16, 18, 11, 2, 14, 10, 22, 5, 8, 22, 9, 14, 11, 22, 12, 13, 11, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Lissey\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"Lebih\", \"jauh\", \"lagi\", \",\", \"suatu\", \"kisah\", \"menceritakan\", \"bahwa\", \"pilihan\", \"tersebut\", \"berasal\", \"dari\", \"kaum\", \"pekerja\", \"pelabuhan\", \",\", \"seperti\", \"di\", \"kota\", \"Liverpool\", \",\", \"yang\", \"memotong\", \"pendek\", \"rambut\", \"mereka\", \"untuk\", \"menghindari\", \"kutu\", \"yang\", \"banyak\", \"terdapat\", \"di\", \"sekitar\", \"pelabuhan\", \".\"], \"arcs\": [2, 7, 2, 7, 6, 7, 0, 11, 11, 9, 7, 13, 11, 13, 14, 13, 19, 19, 13, 19, 11, 23, 11, 23, 23, 25, 28, 23, 28, 32, 32, 29, 35, 35, 32, 7], \"tags\": [15, 15, 15, 11, 4, 2, 5, 19, 2, 4, 25, 8, 9, 14, 14, 11, 8, 8, 21, 10, 11, 2, 7, 13, 17, 4, 8, 24, 17, 2, 15, 7, 8, 8, 9, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Kriya\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"Dilengkapi\", \"dengan\", \"beton\", \"dan\", \"besi\", \",\", \"dinding\", \"Plexiglas\", \"dan\", \"atap\", \"melengkungnya\", \"akan\", \"membolehkan\", \"tamu\", \"melihat\", \"ikan\", \"dan\", \"makhluk\", \"laut\", \"lainnya\", \".\"], \"arcs\": [0, 3, 1, 5, 3, 7, 3, 7, 11, 11, 1, 13, 11, 13, 14, 15, 18, 16, 18, 19, 1], \"tags\": [5, 8, 9, 16, 18, 11, 18, 10, 16, 15, 18, 15, 24, 17, 7, 17, 16, 18, 14, 13, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"B\", \":\", \"Dan\", \"apakah\", \"kamu\", \"sudah\", \"membeli\", \"sesuatu\", \"hadiah\", \"istimewa\", \"buat\", \"paman\", \"mu\", \"?\"], \"arcs\": [0, 1, 7, 7, 7, 7, 1, 7, 8, 9, 12, 8, 12, 1], \"tags\": [5, 11, 16, 15, 2, 15, 26, 17, 14, 13, 8, 21, 14, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"\\\"\", \"Im\", \"Not\", \"That\", \"Girl\", \"\\\"\"], \"arcs\": [2, 0, 2, 3, 4, 2], \"tags\": [11, 5, 10, 10, 10, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Si\", \"Djontahali\", \",\", \"Toean\", \"Van\", \"Mariah\", \"Bandar\"], \"arcs\": [0, 1, 1, 1, 4, 5, 6], \"tags\": [5, 10, 11, 12, 10, 10, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"China\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"Geografi\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"Transportasi\"], \"arcs\": [0], \"tags\": [5]}}\r\n",
      "{\"translation\": {\"texts\": [\"Debu\", \",\", \"rumah\", \",tepung\", \"Sari\", \",\", \"bulu\", \"burung\", \",\", \"sepihan\", \"kulit\", \",\", \"air\", \"liur\", \",\", \"atau\", \"bulu\", \"binatang\", \"peliharaan\", \"(\", \"seperti\", \":\", \"kucing\", \",\", \"anjing\", \",\", \"spora\", \",\", \"jamur\", \")\", \".\"], \"arcs\": [0, 3, 1, 3, 4, 7, 1, 7, 10, 1, 10, 13, 1, 13, 17, 17, 1, 17, 18, 23, 23, 23, 17, 25, 23, 27, 23, 29, 23, 23, 1], \"tags\": [5, 11, 18, 14, 10, 11, 18, 14, 11, 18, 14, 11, 18, 14, 11, 16, 18, 14, 14, 11, 8, 11, 21, 11, 18, 11, 18, 11, 18, 11, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Pranala\", \"luar\"], \"arcs\": [0, 1], \"tags\": [5, 13]}}\r\n",
      "{\"translation\": {\"texts\": [\"Bagi\", \"pengendara\", \"motor\", \"menaruh\", \"seketika\", \"sepeda\", \"motornya\", \"di\", \"tengah\", \"kemacetan\", \".\"], \"arcs\": [2, 4, 2, 0, 6, 4, 6, 9, 6, 9, 4], \"tags\": [8, 9, 14, 5, 8, 9, 14, 8, 21, 14, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"ALIH\", \"Bayombong\", \",\", \"Nueva\", \"Vizcaya\"], \"arcs\": [0, 1, 1, 1, 4], \"tags\": [5, 10, 11, 12, 10]}}\r\n",
      "{\"translation\": {\"texts\": [\"Jendelanya\", \"semuanya\", \"ada\", \"4\", \"buah\", \".\"], \"arcs\": [3, 1, 0, 5, 3, 3], \"tags\": [2, 14, 5, 22, 22, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"pencari\", \"kerja\", \"yang\", \"berhasil\", \"diterima\", \"dan\", \"ditempatkan\", \"sebagai\", \"karyawan\", \"di\"], \"arcs\": [5, 1, 4, 1, 0, 7, 5, 9, 7, 7], \"tags\": [6, 14, 2, 7, 5, 16, 18, 8, 9, 8]}}\r\n",
      "{\"translation\": {\"texts\": [\"Seni\", \"tari\", \"ciptaan\", \"pendiri\", \"dinasti\", \"yang\", \"selama\", \"ini\", \"jarang\", \"dipentaskan\", \"berhubung\", \"materinya\", \"memiliki\", \"ketersinggungan\", \"dengan\", \"pihak\", \"lain\", \"mulai\", \"degelar\", \"dan\", \"dapat\", \"dinikmati\", \"alur\", \"alur\", \"ceriteranya.Tarian\", \"Dirada\", \"Meta\", \"yang\", \"menggambarkan\", \"perjuangan\", \"Mangkunegara\", \"I\", \"mau\", \"tidak\", \"mau\", \"harus\", \"keluar\", \"kandang\", \"untuk\", \"dipertunjukkan.Tidak\", \"ada\", \"alasan\", \"singgung\", \"menyinggung\", \"yang\", \"jelas\", \"bahwa\", \"seni\", \"tari\", \"dalam\", \"kejujurannya\", \"adalah\", \"cermin\", \"dan\", \"suatu\", \"kisah\", \"yang\", \"diungkapkan\", \"dalam\", \"seni\", \"untuk\", \"dikomunikasikan\", \".\"], \"arcs\": [13, 1, 2, 3, 4, 10, 10, 10, 10, 2, 10, 11, 0, 13, 16, 14, 16, 13, 18, 22, 22, 18, 33, 23, 24, 25, 26, 29, 23, 29, 30, 31, 22, 35, 37, 37, 33, 37, 40, 37, 37, 41, 44, 42, 46, 42, 53, 53, 48, 51, 48, 53, 41, 56, 56, 53, 58, 56, 60, 58, 62, 58, 13], \"tags\": [2, 14, 14, 14, 14, 2, 8, 4, 15, 7, 24, 17, 5, 17, 8, 21, 13, 20, 17, 16, 15, 18, 2, 14, 10, 10, 10, 2, 7, 17, 10, 22, 24, 15, 15, 15, 20, 17, 8, 24, 20, 17, 2, 7, 2, 13, 19, 2, 14, 8, 21, 3, 25, 16, 4, 18, 6, 7, 8, 9, 8, 24, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"20\", \".\"], \"arcs\": [0, 1], \"tags\": [5, 11]}}\r\n",
      "{\"translation\": {\"texts\": [\"Alamat\"], \"arcs\": [0], \"tags\": [5]}}\r\n"
     ]
    }
   ],
   "source": [
    "!head -n 100 shuffled-train.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
